import os
import sys
import re
import commands

class Reader(object):
        filename=''

	def extractMetadata(self):
		users = []
		command = 'extract %s' % self.filename
		output = commands.getstatusoutput(command)[1].split('\n')
		p = re.compile('^(Author|creator|last saved by) - (?P<author>.*)')
		for line in output:
			m = p.match(line)
			if m:
				if m.group(2) not in users:
					users.append(m.group(2))
		return users
			
class docReader(Reader):
        def __init__(self, filename):
                self.filename = filename

        def process(self):
                return commands.getstatusoutput('catdoc %s' % self.filename)[1]

class pdfReader(Reader):
        def __init__(self, filename):
                self.filename = filename

        def process(self):
                pass
class xlsReader(Reader):
        def __init__(self, filename):
                self.filename = filename

        def process(self):
                return commands.getstatusoutput('xls2csv %s' % self.filename)[1]

class pptReader(Reader):
        def __init__(self, filename):
                self.filename = filename

        def process(self):
                return commands.getstatusoutput('catppt %s' % self.filename)[1]

